load data

USE_DC <- TRUE

text = read.csv('/Users/roso8920/Emotive Computing Dropbox/Rosy Southwell/EML Rosy/EMLLM/info/ia_label_mapping_opt_surprisal.csv')
if (USE_DC){eeg_dir = '/Volumes/Blue1TB/EEG_processed/unfolded_FRP_reparsed_v6/n400_stats_recomputed/'
} else {eeg_dir = '/Volumes/Blue1TB/EEG_processed/unfolded_FRP_reparsed_v6/n400_stats_nodc/'
}
file_pat = '_reading_N400_stats\\.csv$'
files=list.files(path = eeg_dir, pattern= file_pat)

Loop over participants

all<-c()
i<-0
for (f in files){
    i=i+1
    df <-read.csv(paste0(eeg_dir,f))
    p <- strsplit(f,'_')
    pID <- paste(p[[1]][1],p[[1]][2],sep='_')
    df$pID <- pID
    # df<- df %>% dplyr::select(c("pID", "type","latency","fixDur","task","identifier" , "page_fixation_ix","IA_ID","n400_magnitude_CPz","n400_latency_CPz"  ))
    df<- df %>% filter(task=='reading') %>% droplevels()
    all[[i]] <- df
}
df <- do.call(rbind, all)

# Merge with text info for surprisal values
df <- df %>% merge(text, on=c("IA_ID","identifier"))
df <- df %>% rename(surprisalText=opt.125m_surprisal_wholetext, surprisalPage = opt.125m_surprisal_page,fixDur=duration)
# drop rows with no IA
df <- drop_na(df, "IA_LABEL")
# drop rows with punc
df <- df %>% filter(! IA_LABEL %in% c('.',',','-','--',';',':',"'",'?','!'))

feats <- grep("n400|fixDur",colnames(df), value=T)
feats_eeg <- grep("n400",colnames(df), value=T)

transforms

# df<-df %>% mutate(across(contains('n400_magnitude'),
#              .fns=~log(-.x)), .names="log_neg_{.col}")
df<-df %>% mutate(logFixDur = log(fixDur),
                  logSurprisalText = log(surprisalText),
                  logSurprisalPage = log(surprisalPage))
# lagged surprisal
df <- df %>% group_by(pID, identifier) %>% arrange(page_fixation_ix, .by_group=T) %>% 
  mutate(prev_surprisalText = dplyr::lag(surprisalText, n=1, default=NA),
         prev_surprisalPage = dplyr::lag(surprisalPage, n=1, default=NA),
         prev_logSurprisalText = dplyr::lag(logSurprisalText, n=1, default=NA),
         prev_logSurprisalPage = dplyr::lag(logSurprisalPage, n=1, default=NA),
         )
# refixation
df <- df %>% group_by(pID, identifier, IA_ID) %>% mutate(
  IA_fix_count = n(),
  refixation=as.factor(ifelse(n()>1,1,0)))

plots

df_long <- df %>% pivot_longer(all_of(c(feats,'logFixDur')), names_to="feature",values_to="value")
df_long <- df_long %>% mutate(channel=str_match(feature, 'n400_\\w*_([[:alnum:]]{2,5})$')[,2],
                              feature_type=str_match(feature, '(n400_\\w*)_[[:alnum:]]{2,5}$')[,2])
# compute lower and upper whiskers for each group
ylims <- df_long %>%
  group_by(feature_type) %>%
  summarise(Q1 = quantile(value, 1/100), Q3 = quantile(value, 99/100)) %>%
  ungroup()



p1<-ggplot(df_long %>% filter(feature_type=='n400_mean')) + 
  geom_density(aes(x=value, fill=channel), alpha=0.3) + xlim(-1000,1000) + ggtitle('n400_mean')
p2<-ggplot(df_long %>% filter(feature_type=='n400_min_magnitude')) + 
  geom_density(aes(x=value, fill=channel), alpha=0.3) + xlim(-1000,1000) + ggtitle('n400_min')
p3<-ggplot(df_long %>% filter(feature_type=='n400_max_magnitude')) + 
  geom_density(aes(x=value, fill=channel), alpha=0.3) + xlim(-1000,1000) + ggtitle('n400_max')
grid.arrange(p1,p2,p3)

ggplot(df)+
    geom_density(aes(x=fixDur))+
  scale_x_log10()

ggplot(df)+
    geom_density(aes(x=surprisalText))+
  scale_x_log10()

ggplot(df_long)+
    geom_violin(aes(x=value,y=factor(channel), fill=factor(channel),outliers = FALSE), alpha=0.3)+
  facet_wrap(~feature_type, scales="free") 

ggplot(df_long %>% filter(feature_type =='n400_mean'))+
    geom_point(aes(x=surprisalText,y=value, color=factor(channel)), alpha=0.05, size=0.5)+
  facet_wrap(~channel, scales="free") 

repeat some plots after transformatiions

ggplot(df,aes(x=logSurprisalText, y=logFixDur)) + 
  geom_point(size=1, alpha=0.01)+stat_cor(method="pearson")

ggplot(df_long %>% filter(feature_type =='n400_mean'),aes(x=logSurprisalText,y=value, color=factor(channel)))+
    geom_point(alpha=0.01, size=0.5)+stat_cor(method="pearson", size=2, r.accuracy=0.01, p.accuracy=0.001, label.x=-7.5, label.y=1000)+
  facet_wrap(~channel, scales="free") 

ggplot(df_long %>% filter(feature_type =='n400_max_magnitude'),aes(x=logSurprisalText,y=value, color=factor(channel)))+
    geom_point(alpha=0.01, size=0.5)+stat_cor(method="pearson", size=2, r.accuracy=0.01, p.accuracy=0.001, label.x=-7.5, label.y=1000)+
  facet_wrap(~channel, scales="free") 

ggplot(df_long %>% filter(feature_type =='n400_min_magnitude'),aes(x=logSurprisalText,y=value, color=factor(channel)))+
    geom_point(alpha=0.01, size=0.5)+stat_cor(method="pearson", size=2, r.accuracy=0.01, p.accuracy=0.001, label.x=-7.5, label.y=1000)+
  facet_wrap(~channel, scales="free") 

LME stats

predict EEG from LOG surprisal and fixDur

m.l.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.min <- lmer(n400_max_magnitude_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.max <- lmer(n400_min_magnitude_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.zc <- lmer(n400_zero_crossings_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
tab_model(m.l.m, m.l.min, m.l.max, m.l.zc, show.ci=F)
  n 400 mean C Pz n 400 max magnitude C Pz n 400 min magnitude C Pz n 400 zero crossings C Pz
Predictors Estimates p Estimates p Estimates p Estimates p
(Intercept) -2.03 0.592 71.61 <0.001 -79.35 <0.001 2.13 <0.001
logFixDur 0.59 0.390 0.49 0.484 1.06 0.157 -0.01 0.542
logSurprisalText -0.82 0.001 -0.91 <0.001 -0.67 0.009 -0.00 0.975
Random Effects
σ2 10319.24 10647.05 12018.42 5.00
τ00 44.81 pID 1083.91 pID 493.09 pID 0.52 pID
25.46 identifier 29.95 identifier 30.35 identifier 0.02 identifier
ICC 0.01 0.09 0.04 0.10
N 50 identifier 50 identifier 50 identifier 50 identifier
91 pID 91 pID 91 pID 91 pID
Observations 104272 104272 104272 104272
Marginal R2 / Conditional R2 0.000 / 0.007 0.000 / 0.095 0.000 / 0.042 0.000 / 0.096
dotplot(ranef(m.l.m))
## $pID

## 
## $identifier

predict fixDur from surprisal

m.d <- lmer(fixDur ~ (1|identifier) + (1|pID) + logSurprisalText, data=df)
tab_model(m.d)
  fix Dur
Predictors Estimates CI p
(Intercept) 208.40 202.58 – 214.22 <0.001
logSurprisalText 1.81 1.38 – 2.24 <0.001
Random Effects
σ2 8839.40
τ00 pID 729.69
τ00 identifier 31.32
ICC 0.08
N identifier 50
N pID 91
Observations 104272
Marginal R2 / Conditional R2 0.001 / 0.080

predict fixation fixDur from EEG features

m.eye <- lmer(fixDur ~ (1|identifier) + (1|pID) + n400_mean_CPz, data=df)
tab_model(m.eye)
  fix Dur
Predictors Estimates CI p
(Intercept) 210.20 204.39 – 216.01 <0.001
n400 mean CPz 0.01 0.00 – 0.01 0.002
Random Effects
σ2 8844.36
τ00 pID 729.84
τ00 identifier 32.26
ICC 0.08
N identifier 50
N pID 91
Observations 104272
Marginal R2 / Conditional R2 0.000 / 0.079

predict EEG and surprisal from previous IA surprisal

mp.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText + prev_logSurprisalText, data=df)
tab_model(mp.m)
  n 400 mean C Pz
Predictors Estimates CI p
(Intercept) -1.58 -9.06 – 5.90 0.678
logFixDur 0.60 -0.76 – 1.97 0.385
logSurprisalText -0.77 -1.23 – -0.30 0.001
prev logSurprisalText -0.53 -1.00 – -0.06 0.026
Random Effects
σ2 10266.18
τ00 pID 45.97
τ00 identifier 25.85
ICC 0.01
N identifier 50
N pID 91
Observations 102838
Marginal R2 / Conditional R2 0.000 / 0.007
mp.s <- lm( logSurprisalText ~ prev_logSurprisalText, data=df)
tab_model(mp.s)
  log Surprisal Text
Predictors Estimates CI p
(Intercept) 0.89 0.88 – 0.90 <0.001
prev logSurprisalText 0.15 0.14 – 0.15 <0.001
Observations 102838
R2 / R2 adjusted 0.022 / 0.022

model including bool for refixation

mr.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + refixation*logSurprisalText, data=df)
tab_model(mr.m)
  n 400 mean C Pz
Predictors Estimates CI p
(Intercept) -2.94 -10.43 – 4.55 0.442
logFixDur 0.62 -0.74 – 1.97 0.373
refixation [1] 1.42 -0.18 – 3.03 0.082
logSurprisalText -0.46 -1.20 – 0.28 0.225
refixation [1] *
logSurprisalText
-0.65 -1.59 – 0.29 0.176
Random Effects
σ2 10319.12
τ00 pID 44.82
τ00 identifier 25.40
ICC 0.01
N identifier 50
N pID 91
Observations 104272
Marginal R2 / Conditional R2 0.000 / 0.007
Anova(mr.m)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: n400_mean_CPz
##                               Chisq Df Pr(>Chisq)    
## logFixDur                    0.7943  1  0.3727947    
## refixation                   1.4121  1  0.2347109    
## logSurprisalText            12.9557  1  0.0003189 ***
## refixation:logSurprisalText  1.8291  1  0.1762313    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot_model(mr.m,type = "emm", terms=c("logSurprisalText", "refixation"))

tb<-tableone::CreateTableOne(data = df, vars = c("n400_mean_CPz", "logFixDur"), strata = 'refixation', test=T)
knitr::kable(print(tb))
##                            Stratified by refixation
##                             0              1              p      test
##   n                         36498          67774                     
##   n400_mean_CPz (mean (SD)) -0.14 (104.67)  0.48 (100.36)  0.351     
##   logFixDur (mean (SD))      5.27 (0.44)    5.24 (0.49)   <0.001
0 1 p test
n 36498 67774
n400_mean_CPz (mean (SD)) -0.14 (104.67) 0.48 (100.36) 0.351
logFixDur (mean (SD)) 5.27 (0.44) 5.24 (0.49) <0.001

predict EEG from surprisal and fixDur with random slope of surprisal for participant

m.rs.m <- lmer(n400_mean_CPz ~ (1+logSurprisalText|pID) + (1|identifier) + logSurprisalText + logFixDur, data=df)
m.rs.min <- lmer(n400_max_magnitude_CPz ~ (1+logSurprisalText|pID) +  (1|identifier) + logFixDur + logSurprisalText, data=df)
m.rs.max <- lmer(n400_min_magnitude_CPz ~(1+logSurprisalText|pID) +  (1|identifier) + logFixDur + logSurprisalText, data=df)
tab_model(m.rs.m, m.rs.min, m.rs.max, show.ci=F)
  n 400 mean C Pz n 400 max magnitude C Pz n 400 min magnitude C Pz
Predictors Estimates p Estimates p Estimates p
(Intercept) -2.02 0.594 71.59 <0.001 -79.40 <0.001
logSurprisalText -0.85 0.005 -0.92 0.004 -0.67 0.029
logFixDur 0.61 0.378 0.51 0.473 1.08 0.151
Random Effects
σ2 10313.06 10639.21 12013.52
τ00 60.26 pID 1080.25 pID 507.62 pID
25.69 identifier 30.13 identifier 30.55 identifier
τ11 3.27 pID.logSurprisalText 4.14 pID.logSurprisalText 2.63 pID.logSurprisalText
ρ01 -0.65 pID -0.01 pID -0.22 pID
ICC 0.01 0.10 0.04
N 91 pID 91 pID 91 pID
50 identifier 50 identifier 50 identifier
Observations 104272 104272 104272
Marginal R2 / Conditional R2 0.000 / 0.007 0.000 / 0.095 0.000 / 0.042
dotplot(ranef(m.rs.m))
## $pID

## 
## $identifier

Modeling with comprehension/MW

Load Behavioural data

df_comp <- read.csv('/Users/roso8920/Emotive Computing Dropbox/Rosy Southwell/EyeMindLink/Processed/Behaviour/EML1_page_level.csv')
df_comp <- df_comp %>% rename(pID=ParticipantID) %>% 
  mutate(Page=PageNum-1,
         identifier=paste0(Text, Page))
df <- merge(df, df_comp, on=c("pID","identifier"))
df_mw <- df %>% drop_na(MW) %>% mutate(MW=as.factor(MW))

model FIXATION LEVEL N400 ~ surprisal*MW (caveat - MW is at page level)

m.mw <- lmer(n400_mean_CPz ~ (1+logSurprisalText|pID) + (1|identifier)  + logSurprisalText*MW , data=df_mw)
Anova(m.mw)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: n400_mean_CPz
##                      Chisq Df Pr(>Chisq)
## logSurprisalText    0.1782  1     0.6730
## MW                  0.5418  1     0.4617
## logSurprisalText:MW 1.4514  1     0.2283
tab_model(m.mw)
  n 400 mean C Pz
Predictors Estimates CI p
(Intercept) 2.44 -2.78 – 7.65 0.360
logSurprisalText 0.25 -1.13 – 1.63 0.724
MW [1] 3.12 -1.63 – 7.86 0.198
logSurprisalText * MW [1] -1.32 -3.47 – 0.83 0.228
Random Effects
σ2 9813.79
τ00 pID 264.34
τ00 identifier 48.37
τ11 pID.logSurprisalText 5.19
ρ01 pID -0.89
ICC 0.03
N pID 91
N identifier 20
Observations 22322
Marginal R2 / Conditional R2 0.000 / 0.026
plot_model(m.mw,type = "emm", terms=c("logSurprisalText", "MW"))

plot_model(m.mw,type = "emm", terms=c("logFixDur", "MW"))
## `logFixDur` was not found in model terms. Maybe misspelled?
## Can't compute estimated marginal means, 'emmeans::emmeans()' returned an error.
## 
## Reason: No variable named logFixDur in the reference grid
## You may try 'ggpredict()' or 'ggeffect()'.
## NULL
emmeans(m.mw, pairwise ~ MW, p.adjust = "fdr")
## $emmeans
##  MW emmean   SE  df asymp.LCL asymp.UCL
##  0    2.70 2.42 Inf    -2.042      7.44
##  1    4.41 2.65 Inf    -0.782      9.61
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast estimate   SE  df z.ratio p.value
##  0 - 1       -1.72 2.01 Inf  -0.852  0.3942
## 
## Degrees-of-freedom method: asymptotic

bobby’s mode: MW label at page level, surprosal and n400 at fixation level

# mB <- allFit(lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw))
mB <- lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw, control=lmerControl(optimizer='Nelder_Mead'))
summary(mB)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: n400_mean_CPz ~ 1 + MW * logSurprisalText + (MW * logSurprisalText |  
##     pID) + (logSurprisalText | pID:identifier)
##    Data: df_mw
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 268150.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.8036 -0.5495  0.0163  0.5553  6.1471 
## 
## Random effects:
##  Groups         Name                 Variance Std.Dev. Corr             
##  pID:identifier (Intercept)          1439.761 37.944                    
##                 logSurprisalText        8.569  2.927   -1.00            
##  pID            (Intercept)           148.040 12.167                    
##                 MW1                   283.648 16.842   -0.49            
##                 logSurprisalText        4.536  2.130    0.30 -0.89      
##                 MW1:logSurprisalText    3.799  1.949   -0.79  0.32 -0.44
##  Residual                            9371.358 96.806                    
## Number of obs: 22322, groups:  pID:identifier, 301; pID, 91
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)
## (Intercept)            3.1841     3.4506  71.6234   0.923    0.359
## MW1                    5.8268     5.6940  76.4198   1.023    0.309
## logSurprisalText       0.4260     0.7361  89.4178   0.579    0.564
## MW1:logSurprisalText  -1.4919     1.1792 144.0161  -1.265    0.208
## 
## Correlation of Fixed Effects:
##             (Intr) MW1    lgSrpT
## MW1         -0.588              
## lgSrprslTxt -0.397  0.200       
## MW1:lgSrprT  0.229 -0.489 -0.602
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.54122 (tol = 0.002, component 1)
tab_model(mB)
  n 400 mean C Pz
Predictors Estimates CI p
(Intercept) 3.18 -3.58 – 9.95 0.356
MW [1] 5.83 -5.33 – 16.99 0.306
logSurprisalText 0.43 -1.02 – 1.87 0.563
MW [1] * logSurprisalText -1.49 -3.80 – 0.82 0.206
Random Effects
σ2 9371.36
τ00 pID:identifier 1439.76
τ00 pID 148.04
τ11 pID:identifier.logSurprisalText 8.57
τ11 pID.MW1 283.65
τ11 pID.logSurprisalText 4.54
τ11 pID.MW1:logSurprisalText 3.80
ρ01 pID:identifier -1.00
ρ01 pID.MW1 -0.49
ρ01 pID.logSurprisalText 0.30
ρ01 pID.MW1:logSurprisalText -0.79
ICC 0.13
N pID 91
N identifier 20
Observations 22322
Marginal R2 / Conditional R2 0.000 / 0.131

min n400

# mB <- allFit(lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw))
mB.min <- lmer(n400_min_magnitude_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw, control=lmerControl(optimizer='Nelder_Mead'))
summary(mB.min)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## n400_min_magnitude_CPz ~ 1 + MW * logSurprisalText + (MW * logSurprisalText |  
##     pID) + (logSurprisalText | pID:identifier)
##    Data: df_mw
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 271688.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.7679 -0.5431  0.0392  0.5765  5.4070 
## 
## Random effects:
##  Groups         Name                 Variance Std.Dev. Corr             
##  pID:identifier (Intercept)           1644.07  40.547                   
##                 logSurprisalText        12.70   3.564  -1.00            
##  pID            (Intercept)           2410.96  49.101                   
##                 MW1                  13611.72 116.669   0.30            
##                 logSurprisalText        10.95   3.310  -0.53  0.05      
##                 MW1:logSurprisalText   438.46  20.940  -0.50 -0.96 -0.07
##  Residual                            10873.24 104.275                   
## Number of obs: 22322, groups:  pID:identifier, 301; pID, 91
## 
## Fixed effects:
##                      Estimate Std. Error       df t value          Pr(>|t|)    
## (Intercept)          -73.4987     6.4000  32.9731 -11.484 0.000000000000459 ***
## MW1                    7.5202    16.9979 242.8090   0.442             0.659    
## logSurprisalText       0.2887     0.8523  76.7971   0.339             0.736    
## MW1:logSurprisalText  -1.5319     2.9269  31.9680  -0.523             0.604    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) MW1    lgSrpT
## MW1          0.001              
## lgSrprslTxt -0.456  0.114       
## MW1:lgSrprT -0.189 -0.900 -0.251
## optimizer (Nelder_Mead) convergence code: 4 (failure to converge in 10000 evaluations)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
## failure to converge in 10000 evaluations
tab_model(mB.min)
  n 400 min magnitude C Pz
Predictors Estimates CI p
(Intercept) -73.50 -86.04 – -60.95 <0.001
MW [1] 7.52 -25.80 – 40.84 0.658
logSurprisalText 0.29 -1.38 – 1.96 0.735
MW [1] * logSurprisalText -1.53 -7.27 – 4.21 0.601
Random Effects
σ2 10873.24
τ00 pID:identifier 1644.07
τ00 pID 2410.96
τ11 pID:identifier.logSurprisalText 12.70
τ11 pID.MW1 13611.72
τ11 pID.logSurprisalText 10.95
τ11 pID.MW1:logSurprisalText 438.46
ρ01 pID:identifier -1.00
ρ01 pID.MW1 0.30
ρ01 pID.logSurprisalText -0.53
ρ01 pID.MW1:logSurprisalText -0.50
ICC 0.43
N pID 91
N identifier 20
Observations 22322
Marginal R2 / Conditional R2 0.000 / 0.428

max n400

# mB <- allFit(lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw))
mB.max <- lmer(n400_max_magnitude_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw, control=lmerControl(optimizer='Nelder_Mead'))
summary(mB.max)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## n400_max_magnitude_CPz ~ 1 + MW * logSurprisalText + (MW * logSurprisalText |  
##     pID) + (logSurprisalText | pID:identifier)
##    Data: df_mw
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 269286.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -8.5622 -0.5499 -0.0016  0.5598  6.1442 
## 
## Random effects:
##  Groups         Name                 Variance Std.Dev. Corr             
##  pID:identifier (Intercept)           1937.6   44.018                   
##                 logSurprisalText        79.9    8.939  -0.26            
##  pID            (Intercept)          12337.9  111.076                   
##                 MW1                   3898.6   62.439  -0.99            
##                 logSurprisalText       345.2   18.581   1.00 -0.99      
##                 MW1:logSurprisalText  1771.0   42.083  -0.99  0.96 -0.99
##  Residual                             9631.1   98.138                   
## Number of obs: 22322, groups:  pID:identifier, 301; pID, 91
## 
## Fixed effects:
##                      Estimate Std. Error      df t value    Pr(>|t|)    
## (Intercept)           77.4900    12.3284 39.0833   6.286 0.000000206 ***
## MW1                    5.2379     9.3360  9.3264   0.561       0.588    
## logSurprisalText       0.7178     2.2145 12.9793   0.324       0.751    
## MW1:logSurprisalText  -2.1979     4.7796  9.6507  -0.460       0.656    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) MW1    lgSrpT
## MW1         -0.813              
## lgSrprslTxt  0.808 -0.598       
## MW1:lgSrprT -0.837  0.522 -0.891
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
tab_model(mB.max)
  n 400 max magnitude C Pz
Predictors Estimates CI p
(Intercept) 77.49 53.33 – 101.65 <0.001
MW [1] 5.24 -13.06 – 23.54 0.575
logSurprisalText 0.72 -3.62 – 5.06 0.746
MW [1] * logSurprisalText -2.20 -11.57 – 7.17 0.646
Random Effects
σ2 9631.14
τ00 pID:identifier 1937.62
τ00 pID 12337.85
τ11 pID:identifier.logSurprisalText 79.90
τ11 pID.MW1 3898.61
τ11 pID.logSurprisalText 345.24
τ11 pID.MW1:logSurprisalText 1771.00
ρ01 pID:identifier -0.26
ρ01 pID.MW1 -0.99
ρ01 pID.logSurprisalText 1.00
ρ01 pID.MW1:logSurprisalText -0.99
ICC 0.58
N pID 91
N identifier 20
Observations 22322
Marginal R2 / Conditional R2 0.000 / 0.585

Eye-mind-linkage metric at page level

note atanh transform for correlation when it is to be used in model

eml_page <- df %>% group_by(pID, identifier) %>% summarise(
    count=n(),
    cor_n400meanCPz_logSurprisalText = atanh(cor(logSurprisalText, n400_mean_CPz)),
    cor_n400minCPz_logSurprisalText = atanh(cor(logSurprisalText, n400_min_magnitude_CPz)),
    cor_n400maxCPz_logSurprisalText = atanh(cor(logSurprisalText, n400_max_magnitude_CPz)),
    cor_logFixDur_logSurprisalText = atanh(cor(logSurprisalText, logFixDur)),
    meanLogSurprisalText=mean(logSurprisalText)
) %>% drop_na() %>% 
  filter(count>2) # remove instances with single or two fixation on page as correlation coeff will be 1 or -1
eml_page[Reduce(`&`, lapply(eml_page, is.finite)),]
## # A tibble: 0 × 8
## # Groups:   pID [0]
## # … with 8 variables: pID <chr>, identifier <fct>, count <int>,
## #   cor_n400meanCPz_logSurprisalText <dbl>,
## #   cor_n400minCPz_logSurprisalText <dbl>,
## #   cor_n400maxCPz_logSurprisalText <dbl>,
## #   cor_logFixDur_logSurprisalText <dbl>, meanLogSurprisalText <dbl>
df_page <- left_join(eml_page, df_comp, on=c('pID', 'identifer')) 
df_page$MW = as.factor(df_page$MW)


# plot the new metrics
p1<-ggplot(df_page %>% drop_na(MW))+
  geom_density( aes(x=cor_n400meanCPz_logSurprisalText, color=MW))
p2<-ggplot(df_page %>% drop_na(MW))+
  geom_density( aes(x=cor_logFixDur_logSurprisalText, color=MW))
p3<-ggplot(df_page %>% drop_na(MW))+
  geom_density( aes(x=cor_n400minCPz_logSurprisalText, color=MW))
p4<-ggplot(df_page %>% drop_na(MW))+
  geom_density( aes(x=cor_n400maxCPz_logSurprisalText, color=MW))
grid.arrange(p1,p2,p3,p4)

# plot by participantID
ggplot(df_page, aes(x=cor_n400meanCPz_logSurprisalText, y=pID)) +
  geom_boxplot(  horizontal = TRUE,        outlier.colour="red",
        outlier.fill="red",
        outlier.size=0.2)

binomial regression: predict page level MW from page level correlations with surprisal

m.mw.mean <- glmer(MW ~ cor_n400meanCPz_logSurprisalText + (1|identifier) + (1+cor_n400meanCPz_logSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
plot_model(m.mw.mean,type = "emm", terms=c("cor_n400meanCPz_logSurprisalText"))

m.mw.min <- glmer(MW ~ cor_n400minCPz_logSurprisalText  + (1|identifier) + (1+cor_n400minCPz_logSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
plot_model(m.mw.min,type = "emm", terms=c("cor_n400minCPz_logSurprisalText"))

m.mw.max <- glmer(MW ~ cor_n400maxCPz_logSurprisalText  + (1|identifier) + (1+cor_n400maxCPz_logSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
plot_model(m.mw.max,type = "emm", terms=c("cor_n400maxCPz_logSurprisalText"))

m.mw.all <- glmer(MW ~ cor_n400meanCPz_logSurprisalText + cor_n400minCPz_logSurprisalText  +cor_n400maxCPz_logSurprisalText  + (1|identifier) + (1|pID), data=df_page %>% drop_na(MW), family = "binomial")

tab_model(m.mw.mean, m.mw.min, m.mw.max)
  MW MW MW
Predictors Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p
(Intercept) 0.47 0.47 – 0.47 <0.001 0.48 0.28 – 0.82 0.008 0.48 0.28 – 0.83 0.008
cor n400meanCPz
logSurprisalText
0.36 0.36 – 0.36 <0.001
cor n400minCPz
logSurprisalText
0.33 0.08 – 1.38 0.130
cor n400maxCPz
logSurprisalText
0.60 0.16 – 2.21 0.442
Random Effects
σ2 3.29 3.29 3.29
τ00 1.27 pID 1.26 pID 1.22 pID
0.69 identifier 0.68 identifier 0.66 identifier
τ11 0.07 pID.cor_n400meanCPz_logSurprisalText 0.04 pID.cor_n400minCPz_logSurprisalText 0.95 pID.cor_n400maxCPz_logSurprisalText
ρ01 -1.00 pID -1.00 pID -1.00 pID
ICC 0.37 0.37 0.38
N 20 identifier 20 identifier 20 identifier
91 pID 91 pID 91 pID
Observations 287 287 287
Marginal R2 / Conditional R2 0.023 / 0.389 0.021 / 0.384 0.005 / 0.381
tab_model(m.mw.mean)
  MW
Predictors Odds Ratios CI p
(Intercept) 0.47 0.47 – 0.47 <0.001
cor n400meanCPz
logSurprisalText
0.36 0.36 – 0.36 <0.001
Random Effects
σ2 3.29
τ00 pID 1.27
τ00 identifier 0.69
τ11 pID.cor_n400meanCPz_logSurprisalText 0.07
ρ01 pID -1.00
ICC 0.37
N identifier 20
N pID 91
Observations 287
Marginal R2 / Conditional R2 0.023 / 0.389
tab_model(m.mw.all)
  MW
Predictors Odds Ratios CI p
(Intercept) 0.47 0.27 – 0.83 0.010
cor n400meanCPz
logSurprisalText
0.16 0.00 – 25.31 0.474
cor n400minCPz
logSurprisalText
1.06 0.02 – 44.93 0.976
cor n400maxCPz
logSurprisalText
2.40 0.12 – 46.38 0.563
Random Effects
σ2 3.29
τ00 pID 1.32
τ00 identifier 0.71
ICC 0.38
N identifier 20
N pID 91
Observations 287
Marginal R2 / Conditional R2 0.031 / 0.401
emmeans(m.mw.mean, pairwise ~ cor_n400meanCPz_logSurprisalText, p.adjust = "fdr")
## $emmeans
##  cor_n400meanCPz_logSurprisalText  emmean       SE  df asymp.LCL asymp.UCL
##                         -0.002209 -0.7456 0.001643 Inf   -0.7488   -0.7424
## 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast  estimate SE df z.ratio p.value
##  (nothing)   nonEst NA NA      NA      NA
## 
## Note: contrasts are still on the logit scale

Does page-average surprisal predict MW

m.mw.sp <- glmer(MW ~ meanLogSurprisalText  +  (1+meanLogSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
tab_model(m.mw.sp)
  MW
Predictors Odds Ratios CI p
(Intercept) 0.25 0.08 – 0.81 0.020
meanLogSurprisalText 1.96 0.61 – 6.24 0.256
Random Effects
σ2 3.29
τ00 pID 1.91
τ11 pID.meanLogSurprisalText 4.98
ρ01 pID -0.93
ICC 0.36
N pID 91
Observations 287
Marginal R2 / Conditional R2 0.010 / 0.367